import pandas as pd
import seaborn as sns
import plotly.express as px
import scipy.stats as stats
import plotly.io as pio
pio.renderers.default='notebook'
df = pd.read_csv('miss_universe.csv')
# Calculate the probability of winning by country
country_counts = df['Country'].value_counts()
# Create an ordered bar chart of the country probabilities
fig = px.bar(country_counts, x=country_counts.index, y=country_counts, title='Miss Universe Winners by country')
fig.update_yaxes(title='Number of wins')
fig.show()
# Calculate the probability of winning by hair color
hair_color_counts = df['Hair_Color'].value_counts()
hair_color_probabilities = hair_color_counts / len(df)
# Create a pie chart of the hair color probabilities
fig = px.pie(hair_color_probabilities, values=hair_color_probabilities, names=hair_color_probabilities.index, title='Probability of Winning Miss Universe by Hair Color')
fig.show()
# Create a scatter plot of age vs height
fig = px.scatter(df, x='Height', y='Age', trendline='ols')
# Calculate the Pearson correlation coefficient
corr, _ = stats.pearsonr(df['Height'], df['Age'])
print('Pearson correlation coefficient:', corr)
# Show the plot
fig.show()
Pearson correlation coefficient: 0.10453379189407233
# Create a contingency table of country vs hair color
from matplotlib import pyplot as plt
cont_table = pd.crosstab(df['Country'], df['Hair_Color'], margins=True)
# Calculate the chi-square test statistic and p-value
chi2, pval, _, _ = stats.chi2_contingency(cont_table)
# Create a heatmap of the contingency table
sns.heatmap(cont_table, annot=True, cmap='YlGnBu', cbar=False)
# Add the chi-square test result to the plot
plt.title(f'Chi-square test statistic: {chi2:.2f} (p = {pval:.2f})')
plt.show()